The goal of the analyses in this notebook is to identify predictors of frog survival following translocation. The analyses are conducted in a Bayesian framework using the R package rstanarm.

Load packages

library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.7     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(brms)
Loading required package: Rcpp
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
Loading 'brms' package (version 2.17.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: ‘brms’

The following object is masked from ‘package:stats’:

    ar
library(rstanarm)
This is rstanarm version 2.21.3
- See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
- Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
- For execution on a local, multicore CPU with excess RAM we recommend calling
  options(mc.cores = parallel::detectCores())

Attaching package: ‘rstanarm’

The following objects are masked from ‘package:brms’:

    dirichlet, exponential, get_y, lasso, ngrps
library(bayesplot)
This is bayesplot version 1.9.0
- Online documentation and vignettes at mc-stan.org/bayesplot
- bayesplot theme set to bayesplot::theme_default()
   * Does _not_ affect other ggplot2 plots
   * See ?bayesplot_theme_set for details on theme setting
library(tidybayes)

Attaching package: ‘tidybayes’

The following objects are masked from ‘package:brms’:

    dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(loo)
This is loo version 2.5.1
- Online documentation and vignettes at mc-stan.org/loo
- As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session. 
library(broom.mixed)
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
source("classification_summary.R")

Read in frog translocation dataset

d1 <- read_csv(here::here("data", "clean", "frog_translocation_final.csv")) %>% 
  drop_na(bd_load) %>%  # drop xx records from 70xxx where bd_load is NA
  mutate(
    first = if_else(order == 1, 1, 0),
    surv = if_else(survival < 0.5, 0, 1),
    male = if_else(sex == "m", 1, 0),
    elev_lo = if_else(elevation < 3020, 1, 0), # using interval defined by cut_interval, n = 2
    elev_z = arm::rescale(elevation),
    snowt_z = arm::rescale(snow_t),
    snowt1_z = arm::rescale(snow_t1),
    day_z = arm::rescale(day),
    length_z = arm::rescale(length),
    bdload_l = log10(bd_load + 1),
    bdload_z = arm::rescale(bdload_l),
    shore_c = shore - mean(shore),
    male_c = male - mean(male),
    elevlo_c = elev_lo - mean(elev_lo),
    first_c = first - mean(first),
    across(c(shore_c, male_c, elevlo_c, first_c), round, 3), 
    across(c(site_id, shore_c, first_c, donor, male_c, elevlo_c, year), as.factor),
    surv = as.integer(surv))
Rows: 779 Columns: 16── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): pit_tag_ref, sex
dbl  (13): site_id, elevation, shore, snow_t, snow_t1, year, day, order, donor, length, weight, bd_load, survival
date  (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Add a descriptive translocation_id
d1 <- d1 %>% 
  distinct(site_id, year) %>% 
  arrange(site_id, year) %>% 
  group_by(site_id) %>% 
  mutate(transno = seq_len(n())) %>% 
  ungroup(site_id) %>% 
  unite(translocation_id, c("site_id", "transno"), remove = FALSE) %>% 
  select(-transno) %>% 
  mutate(translocation_id = as.factor(translocation_id)) %>% 
  inner_join(d1, by = c("site_id", "year")) %>% 
  relocate(translocation_id, .after = site_id)

Null values - check

d1 %>% summarize(across(everything(), ~ sum(is.na(.))))

Model using non-standardized predictors

Model specification

m1a <- stan_glm(
  surv ~ length + snow_t + snow_t1 + day + bdload_l + elevation + first + male + donor,
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2,
  cores = 4,
  seed = 84735)
starting worker pid=10413 on localhost:11976 at 13:34:41.032
starting worker pid=10442 on localhost:11976 at 13:34:41.153
starting worker pid=10471 on localhost:11976 at 13:34:41.276
starting worker pid=10500 on localhost:11976 at 13:34:41.395

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 7.5e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.75 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2.9e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 3.1e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.31 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2.8e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 1.04962 seconds (Warm-up)
Chain 1:                1.09124 seconds (Sampling)
Chain 1:                2.14086 seconds (Total)
Chain 1: 
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 1.01617 seconds (Warm-up)
Chain 2:                1.10254 seconds (Sampling)
Chain 2:                2.11871 seconds (Total)
Chain 2: 
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 1.01876 seconds (Warm-up)
Chain 3:                1.17841 seconds (Sampling)
Chain 3:                2.19717 seconds (Total)
Chain 3: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 1.03517 seconds (Warm-up)
Chain 4:                1.15732 seconds (Sampling)
Chain 4:                2.1925 seconds (Total)
Chain 4: 

Priors & chain diagnostics

prior_summary(m1a)
Priors for model 'm1a' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [0.526,0.049,0.073,...])
------
See help('prior_summary.stanreg') for more details
mcmc_trace(m1a, size = 0.1)

mcmc_dens_overlay(m1a)


summary(m1a)

Model Info:
 function:     stan_glm
 family:       binomial [logit]
 formula:      surv ~ length + snow_t + snow_t1 + day + bdload_l + elevation + 
       first + male + donor
 algorithm:    sampling
 sample:       20000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 767
 predictors:   11

Estimates:
              mean   sd    10%   50%   90%
(Intercept)  -8.5    2.3 -11.5  -8.5  -5.5
length        0.0    0.0   0.0   0.0   0.0
snow_t        0.0    0.0   0.0   0.0   0.0
snow_t1       0.0    0.0   0.0   0.0   0.0
day           0.0    0.0   0.0   0.0   0.0
bdload_l     -0.1    0.1  -0.1  -0.1   0.0
elevation     0.0    0.0   0.0   0.0   0.0
first         0.4    0.2   0.1   0.4   0.7
male          0.3    0.2   0.1   0.3   0.5
donor70567   -1.3    0.4  -1.9  -1.3  -0.8
donor72996   -2.2    0.4  -2.7  -2.2  -1.7

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.3    0.0  0.3   0.3   0.3  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
              mcse Rhat n_eff
(Intercept)   0.0  1.0  24916
length        0.0  1.0  25653
snow_t        0.0  1.0  22266
snow_t1       0.0  1.0  21140
day           0.0  1.0  22645
bdload_l      0.0  1.0  29642
elevation     0.0  1.0  24654
first         0.0  1.0  23563
male          0.0  1.0  28297
donor70567    0.0  1.0  20512
donor72996    0.0  1.0  18851
mean_PPD      0.0  1.0  23850
log-posterior 0.0  1.0   8254

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
  • mcmc diagnostics all look good.

List model variable names

get_variables(m1a)
 [1] "(Intercept)"   "length"        "snow_t"        "snow_t1"       "day"           "bdload_l"      "elevation"     "first"        
 [9] "male"          "donor70567"    "donor72996"    "accept_stat__" "stepsize__"    "treedepth__"   "n_leapfrog__"  "divergent__"  
[17] "energy__"     

Posterior predictive check

pp_check(m1a, plotfun = "stat") +
    xlab("Frog survival rate")

  • Posterior predictive distribution implied by regression model closely matches original data (based on mean frog survival).

Calculate leave-one-out cross-validation (loo)

loo_m1a <- loo(m1a, cores = 4, save_psis = TRUE)
print(loo_m1a)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Posterior analysis

tidy(m1a, conf.int = TRUE, conf.level = 0.90)

mcmc_intervals(m1a, point_est = "median", prob = 0.80, prob_outer = 0.9) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  # add 0-line to accentuate default line
  ggtitle("Model with non-standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")


mcmc_areas(m1a, area_method = "equal height", prob = 0.90) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with non-standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")


mean(bayes_R2(m1a))
[1] 0.2728229
  • Using non-standardized predictors makes plot hard to read due to vastly different scales of predictors.
  • Based on 90% uncertainty intervals, important predictors are snow_t1, elevation, first, donor70567, and donor72996.

Model using standardized predictors

Model specification

m1b <- stan_glm(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor,
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4, 
  seed = 84735)
starting worker pid=10862 on localhost:11976 at 13:35:10.488
starting worker pid=10891 on localhost:11976 at 13:35:10.614
starting worker pid=10920 on localhost:11976 at 13:35:10.733
starting worker pid=10949 on localhost:11976 at 13:35:10.863

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 4.8e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.48 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2.7e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 3.2e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.32 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 1.04687 seconds (Warm-up)
Chain 1:                1.09385 seconds (Sampling)
Chain 1:                2.14072 seconds (Total)
Chain 1: 
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 1.0252 seconds (Warm-up)
Chain 2:                1.10545 seconds (Sampling)
Chain 2:                2.13065 seconds (Total)
Chain 2: 
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 1.04144 seconds (Warm-up)
Chain 3:                1.18587 seconds (Sampling)
Chain 3:                2.22731 seconds (Total)
Chain 3: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 1.04213 seconds (Warm-up)
Chain 4:                1.14531 seconds (Sampling)
Chain 4:                2.18744 seconds (Total)
Chain 4: 

Priors, chain diagnostics, posterior predictive check

prior_summary(m1b)
Priors for model 'm1b' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [5.00,5.00,5.00,...])
------
See help('prior_summary.stanreg') for more details
mcmc_trace(m1b, size = 0.1)

mcmc_dens_overlay(m1a)


summary(m1b)

Model Info:
 function:     stan_glm
 family:       binomial [logit]
 formula:      surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + 
       first_c + male_c + donor
 algorithm:    sampling
 sample:       20000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 767
 predictors:   11

Estimates:
               mean   sd   10%   50%   90%
(Intercept)   0.4    0.4  0.0   0.4   0.9 
length_z      0.0    0.2 -0.3   0.0   0.3 
snowt_z       0.0    0.2 -0.3   0.0   0.3 
snowt1_z     -1.3    0.2 -1.6  -1.3  -1.0 
day_z         0.2    0.2 -0.1   0.2   0.4 
bdload_z     -0.2    0.2 -0.4  -0.2   0.1 
elev_z        1.5    0.3  1.2   1.5   1.9 
first_c0.353  0.4    0.2  0.1   0.4   0.7 
male_c0.548   0.3    0.2  0.1   0.3   0.5 
donor70567   -1.3    0.4 -1.9  -1.3  -0.8 
donor72996   -2.2    0.4 -2.7  -2.2  -1.7 

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.3    0.0  0.3   0.3   0.3  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
              mcse Rhat n_eff
(Intercept)   0.0  1.0  21959
length_z      0.0  1.0  25653
snowt_z       0.0  1.0  22266
snowt1_z      0.0  1.0  21140
day_z         0.0  1.0  22645
bdload_z      0.0  1.0  29642
elev_z        0.0  1.0  24654
first_c0.353  0.0  1.0  23563
male_c0.548   0.0  1.0  28297
donor70567    0.0  1.0  20512
donor72996    0.0  1.0  18851
mean_PPD      0.0  1.0  23850
log-posterior 0.0  1.0   8254

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
  • mcmc diagnostics all look good.

List model variable names

get_variables(m1b)
 [1] "(Intercept)"   "length_z"      "snowt_z"       "snowt1_z"      "day_z"         "bdload_z"      "elev_z"        "first_c0.353" 
 [9] "male_c0.548"   "donor70567"    "donor72996"    "accept_stat__" "stepsize__"    "treedepth__"   "n_leapfrog__"  "divergent__"  
[17] "energy__"     

Posterior predictive check

pp_check(m1b, plotfun = "stat") +
    xlab("Frog survival rate")

  • Posterior predictive distribution implied by regression model closely matches original data (based on mean frog survival).

Calculate loo

loo_m1b <- loo(m1b, cores = 4, save_psis = TRUE)
print(loo_m1b)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Posterior analysis

tidy(m1b, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1b, area_method = "equal height", prob = 0.90) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")


mean(bayes_R2(m1b))
[1] 0.2728229
  • As expected, using standardized predictors allows easy comparison across predictors. Important predictors are unchanged.
  • Based on 90% uncertainty intervals, important predictors are snow_t1, elevation, first, donor70567, and donor72996.

Compare models

loo_compare(loo_m1a, loo_m1b)
    elpd_diff se_diff
m1a 0.0       0.0    
m1b 0.0       0.0    

Possible grouping structures to add to model

The dataset has three groups: site_id, translocation_id, and donor site. Donor site is not suitable as a grouping variable because there are only 3 levels (i.e., site ids), not enough to make this useful. Site_id and translocation_id contain 12 and 24 levels, respectively, making them suitable as grouping variables. Including both site_id and translocation_id as nested grouping variables (i.e., translocations nested within site ids) also seems justified. Having more levels within a group would seem to increase the power to detect effects of predictors. With fewer levels/less variability, effect may be accounted for primarily by grouping variable itself instead of predictor. With only 1-4 translocations per site and only 12 site ids total, it is possible that model with site_id as sole grouping variable and model with site_id and translocation_id as nested grouping variables will attribute more of the variation in frog survival to the grouping variables and less to the predictors. A priori, that would suggest that the model with translocation_id as the grouping variable might be the most useful model structure (assuming that all models have similar posterior predictive accuracy). Lacking any strong rationale for one model structure over another, built 3 models and compared their posterior predictive accuracy using loo.

Model using standardized predictors and site_id grouping variable

Model specification

m1c <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | site_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
starting worker pid=11308 on localhost:11976 at 13:35:38.500
starting worker pid=11337 on localhost:11976 at 13:35:38.651
starting worker pid=11372 on localhost:11976 at 13:35:38.790
starting worker pid=11403 on localhost:11976 at 13:35:38.937

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: Rejecting initial value:
Chain 1:   Log probability evaluates to log(0), i.e. negative infinity.
Chain 1:   Stan can't start sampling from this initial value.
Chain 1: 
Chain 1: Gradient evaluation took 0.000169 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.69 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000109 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.09 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000118 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.18 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: Rejecting initial value:
Chain 4:   Log probability evaluates to log(0), i.e. negative infinity.
Chain 4:   Stan can't start sampling from this initial value.
Chain 4: 
Chain 4: Gradient evaluation took 0.000119 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.19 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 26.8153 seconds (Warm-up)
Chain 2:                25.8212 seconds (Sampling)
Chain 2:                52.6365 seconds (Total)
Chain 2: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 27.3018 seconds (Warm-up)
Chain 4:                25.8839 seconds (Sampling)
Chain 4:                53.1858 seconds (Total)
Chain 4: 
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 26.6674 seconds (Warm-up)
Chain 3:                27.4283 seconds (Sampling)
Chain 3:                54.0958 seconds (Total)
Chain 3: 
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 27.1176 seconds (Warm-up)
Chain 1:                30.398 seconds (Sampling)
Chain 1:                57.5155 seconds (Total)
Chain 1: 

Priors & chain diagnostics

prior_summary(m1c)
Priors for model 'm1c' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [5.00,5.00,5.00,...])

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details
b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1c, size = 0.1, pars = b1)

mcmc_dens_overlay(m1c, pars = b1)


summary(m1c)

Model Info:
 function:     stan_glmer
 family:       binomial [logit]
 formula:      surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + 
       first_c + male_c + donor + (1 | site_id)
 algorithm:    sampling
 sample:       20000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 767
 groups:       site_id (12)

Estimates:
                                         mean   sd   10%   50%   90%
(Intercept)                             0.0    1.2 -1.5   0.0   1.3 
length_z                                0.0    0.2 -0.3   0.0   0.3 
snowt_z                                 0.0    0.4 -0.5   0.0   0.5 
snowt1_z                                0.0    0.5 -0.7  -0.1   0.7 
day_z                                   0.2    0.4 -0.4   0.2   0.7 
bdload_z                               -0.2    0.2 -0.5  -0.2   0.1 
elev_z                                  1.7    1.0  0.5   1.7   3.0 
first_c0.353                            0.3    0.2  0.0   0.3   0.7 
male_c0.548                             0.4    0.2  0.1   0.3   0.6 
donor70567                             -0.8    1.7 -2.8  -0.8   1.3 
donor72996                             -1.9    1.3 -3.4  -1.9  -0.3 
b[(Intercept) site_id:70134]           -0.4    0.6 -1.2  -0.4   0.3 
b[(Intercept) site_id:70370]           -1.7    1.0 -2.9  -1.6  -0.5 
b[(Intercept) site_id:70413]            0.0    1.3 -1.6   0.0   1.5 
b[(Intercept) site_id:70414]           -1.2    1.2 -2.7  -1.0   0.2 
b[(Intercept) site_id:70449]            1.5    0.6  0.8   1.5   2.3 
b[(Intercept) site_id:70505]            0.2    0.6 -0.5   0.2   1.0 
b[(Intercept) site_id:70550]            0.6    0.6 -0.2   0.5   1.4 
b[(Intercept) site_id:70556]           -0.9    1.0 -2.2  -0.9   0.4 
b[(Intercept) site_id:70619]           -0.6    0.7 -1.5  -0.5   0.3 
b[(Intercept) site_id:70628]            1.1    0.8  0.2   1.1   2.1 
b[(Intercept) site_id:70641]            0.0    0.8 -0.9   0.0   1.0 
b[(Intercept) site_id:74976]            1.0    1.0 -0.2   0.9   2.3 
Sigma[site_id:(Intercept),(Intercept)]  1.9    1.4  0.6   1.5   3.5 

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.3    0.0  0.3   0.3   0.3  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
                                       mcse Rhat n_eff
(Intercept)                            0.0  1.0   9604
length_z                               0.0  1.0  29640
snowt_z                                0.0  1.0  12471
snowt1_z                               0.0  1.0   9663
day_z                                  0.0  1.0   9895
bdload_z                               0.0  1.0  29229
elev_z                                 0.0  1.0   8370
first_c0.353                           0.0  1.0  25887
male_c0.548                            0.0  1.0  30649
donor70567                             0.0  1.0  12665
donor72996                             0.0  1.0   9205
b[(Intercept) site_id:70134]           0.0  1.0  12243
b[(Intercept) site_id:70370]           0.0  1.0   9052
b[(Intercept) site_id:70413]           0.0  1.0  16783
b[(Intercept) site_id:70414]           0.0  1.0  13633
b[(Intercept) site_id:70449]           0.0  1.0   9662
b[(Intercept) site_id:70505]           0.0  1.0  10089
b[(Intercept) site_id:70550]           0.0  1.0   9125
b[(Intercept) site_id:70556]           0.0  1.0  13480
b[(Intercept) site_id:70619]           0.0  1.0   9114
b[(Intercept) site_id:70628]           0.0  1.0   9365
b[(Intercept) site_id:70641]           0.0  1.0   8782
b[(Intercept) site_id:74976]           0.0  1.0  10720
Sigma[site_id:(Intercept),(Intercept)] 0.0  1.0   7013
mean_PPD                               0.0  1.0  18973
log-posterior                          0.1  1.0   5668

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Posterior predictive check

pp_check(m1c, plotfun = "stat") +
    xlab("Frog survival rate")

Calculate loo

loo_m1c <- loo(m1c, cores = 4, save_psis = TRUE)
print(loo_m1c)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Posterior analysis

tidy(m1c, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1c, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & site_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")


mean(bayes_R2(m1c))
[1] 0.3169546
# Frog survival by site_id
mcmc_areas_ridges(m1c, regex_pars = "site_id") +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each recipient site_id")

  • Based on 90% uncertainty intervals, important predictors reduced to elevation and male.

Model using standardized predictors and translocation_id grouping variable)

Model specification

m1d <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | translocation_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
starting worker pid=11797 on localhost:11976 at 13:37:04.696
starting worker pid=11826 on localhost:11976 at 13:37:04.820
starting worker pid=11855 on localhost:11976 at 13:37:04.938
starting worker pid=11884 on localhost:11976 at 13:37:05.060

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: Rejecting initial value:
Chain 1:   Log probability evaluates to log(0), i.e. negative infinity.
Chain 1:   Stan can't start sampling from this initial value.
Chain 1: 
Chain 1: Gradient evaluation took 0.000111 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.11 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000108 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.08 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.00012 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.2 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000115 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.15 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 17.9894 seconds (Warm-up)
Chain 1:                16.7475 seconds (Sampling)
Chain 1:                34.7369 seconds (Total)
Chain 1: 
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 17.1323 seconds (Warm-up)
Chain 2:                17.7856 seconds (Sampling)
Chain 2:                34.9179 seconds (Total)
Chain 2: 
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 17.7312 seconds (Warm-up)
Chain 3:                17.3252 seconds (Sampling)
Chain 3:                35.0564 seconds (Total)
Chain 3: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 17.6394 seconds (Warm-up)
Chain 4:                17.244 seconds (Sampling)
Chain 4:                34.8834 seconds (Total)
Chain 4: 

Priors & chain diagnostics

prior_summary(m1d)
Priors for model 'm1d' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [5.00,5.00,5.00,...])

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details
b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1d, size = 0.1, pars = b1)

mcmc_dens_overlay(m1d, pars = b1)


summary(m1d)

Model Info:
 function:     stan_glmer
 family:       binomial [logit]
 formula:      surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + 
       first_c + male_c + donor + (1 | translocation_id)
 algorithm:    sampling
 sample:       20000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 767
 groups:       translocation_id (24)

Estimates:
                                                  mean   sd   10%   50%   90%
(Intercept)                                      0.3    0.7 -0.6   0.3   1.2 
length_z                                         0.0    0.2 -0.3   0.0   0.3 
snowt_z                                          0.0    0.6 -0.7   0.0   0.7 
snowt1_z                                        -1.2    0.5 -1.9  -1.2  -0.5 
day_z                                            0.1    0.5 -0.6   0.1   0.8 
bdload_z                                        -0.2    0.2 -0.5  -0.2   0.0 
elev_z                                           1.6    0.6  0.8   1.6   2.4 
first_c0.353                                     0.5    0.5 -0.1   0.5   1.1 
male_c0.548                                      0.3    0.2  0.1   0.3   0.6 
donor70567                                      -1.3    0.9 -2.4  -1.3  -0.2 
donor72996                                      -2.2    0.8 -3.2  -2.2  -1.3 
b[(Intercept) translocation_id:70134_1]         -0.6    0.5 -1.3  -0.6   0.1 
b[(Intercept) translocation_id:70370_1]         -0.6    0.8 -1.6  -0.5   0.4 
b[(Intercept) translocation_id:70370_2]          0.0    0.7 -0.9   0.0   0.9 
b[(Intercept) translocation_id:70413_1]         -0.1    0.7 -1.0  -0.1   0.8 
b[(Intercept) translocation_id:70413_2]          0.4    0.7 -0.5   0.4   1.3 
b[(Intercept) translocation_id:70413_3]         -0.3    0.8 -1.3  -0.3   0.6 
b[(Intercept) translocation_id:70414_1]         -0.9    0.8 -2.0  -0.9   0.1 
b[(Intercept) translocation_id:70449_1]          0.1    0.6 -0.7   0.1   0.9 
b[(Intercept) translocation_id:70449_2]          1.6    0.6  0.9   1.6   2.4 
b[(Intercept) translocation_id:70505_1]          0.1    0.5 -0.6   0.1   0.8 
b[(Intercept) translocation_id:70505_2]          0.1    0.6 -0.7   0.1   0.9 
b[(Intercept) translocation_id:70505_3]          0.2    0.7 -0.7   0.2   1.0 
b[(Intercept) translocation_id:70505_4]         -0.1    0.6 -0.9  -0.1   0.7 
b[(Intercept) translocation_id:70550_1]          0.5    0.5 -0.2   0.5   1.2 
b[(Intercept) translocation_id:70550_2]         -0.1    0.6 -0.9  -0.1   0.6 
b[(Intercept) translocation_id:70556_1]         -0.5    0.7 -1.4  -0.5   0.4 
b[(Intercept) translocation_id:70556_2]         -0.8    0.7 -1.8  -0.8   0.0 
b[(Intercept) translocation_id:70619_1]         -0.5    0.6 -1.2  -0.5   0.2 
b[(Intercept) translocation_id:70628_1]          0.8    0.6  0.0   0.8   1.6 
b[(Intercept) translocation_id:70641_1]          0.2    0.7 -0.7   0.2   1.1 
b[(Intercept) translocation_id:70641_2]         -0.3    0.7 -1.1  -0.2   0.6 
b[(Intercept) translocation_id:70641_3]         -0.7    0.7 -1.7  -0.7   0.2 
b[(Intercept) translocation_id:74976_1]          1.4    0.8  0.4   1.3   2.4 
b[(Intercept) translocation_id:74976_2]          0.0    0.7 -0.8   0.0   0.9 
Sigma[translocation_id:(Intercept),(Intercept)]  0.9    0.5  0.4   0.8   1.6 

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.3    0.0  0.3   0.3   0.3  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
                                                mcse Rhat n_eff
(Intercept)                                     0.0  1.0  10818
length_z                                        0.0  1.0  25683
snowt_z                                         0.0  1.0  10328
snowt1_z                                        0.0  1.0  12115
day_z                                           0.0  1.0  10672
bdload_z                                        0.0  1.0  28564
elev_z                                          0.0  1.0  11059
first_c0.353                                    0.0  1.0  11342
male_c0.548                                     0.0  1.0  26534
donor70567                                      0.0  1.0  10846
donor72996                                      0.0  1.0  10753
b[(Intercept) translocation_id:70134_1]         0.0  1.0  14706
b[(Intercept) translocation_id:70370_1]         0.0  1.0  15110
b[(Intercept) translocation_id:70370_2]         0.0  1.0  17141
b[(Intercept) translocation_id:70413_1]         0.0  1.0  13738
b[(Intercept) translocation_id:70413_2]         0.0  1.0  14123
b[(Intercept) translocation_id:70413_3]         0.0  1.0  16112
b[(Intercept) translocation_id:70414_1]         0.0  1.0  17969
b[(Intercept) translocation_id:70449_1]         0.0  1.0  12532
b[(Intercept) translocation_id:70449_2]         0.0  1.0  13621
b[(Intercept) translocation_id:70505_1]         0.0  1.0  14823
b[(Intercept) translocation_id:70505_2]         0.0  1.0  18847
b[(Intercept) translocation_id:70505_3]         0.0  1.0  20672
b[(Intercept) translocation_id:70505_4]         0.0  1.0  17878
b[(Intercept) translocation_id:70550_1]         0.0  1.0  11544
b[(Intercept) translocation_id:70550_2]         0.0  1.0  15234
b[(Intercept) translocation_id:70556_1]         0.0  1.0  14110
b[(Intercept) translocation_id:70556_2]         0.0  1.0  14298
b[(Intercept) translocation_id:70619_1]         0.0  1.0  12428
b[(Intercept) translocation_id:70628_1]         0.0  1.0  11195
b[(Intercept) translocation_id:70641_1]         0.0  1.0  11049
b[(Intercept) translocation_id:70641_2]         0.0  1.0  18120
b[(Intercept) translocation_id:70641_3]         0.0  1.0  16196
b[(Intercept) translocation_id:74976_1]         0.0  1.0  14344
b[(Intercept) translocation_id:74976_2]         0.0  1.0  14465
Sigma[translocation_id:(Intercept),(Intercept)] 0.0  1.0   8036
mean_PPD                                        0.0  1.0  19612
log-posterior                                   0.1  1.0   5727

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Posterior predictive check

pp_check(m1d, plotfun = "stat") +
    xlab("Frog survival rate")

Calculate loo

loo_m1d <- loo(m1d, cores = 4, save_psis = TRUE)
print(loo_m1d)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Posterior analysis

tidy(m1d, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1d, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & translocation_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")


mean(bayes_R2(m1d))
[1] 0.322301
# Frog survival by translocation_id
mcmc_areas_ridges(m1d, regex_pars = "translocation_id") +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")

  • Based on loo, model with translocation_id as grouping variable provides slightly better fit than model with site_id as grouping variable.
  • Based on 90% uncertainty intervals, important predictors are similar to those in model without grouping variable: snowt1_z, elev_z, male_c0.548, donor72996.

Model using standardized predictors and 2 grouping variables: translocation_id nested within site_id)

Model specification

m1e <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | translocation_id) + (1 | site_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
starting worker pid=12308 on localhost:11976 at 13:38:14.420
starting worker pid=12337 on localhost:11976 at 13:38:14.541
starting worker pid=12366 on localhost:11976 at 13:38:14.669
starting worker pid=12395 on localhost:11976 at 13:38:14.798

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000122 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.22 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000117 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.17 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.00015 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.5 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000148 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.48 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 33.7892 seconds (Warm-up)
Chain 2:                29.5968 seconds (Sampling)
Chain 2:                63.386 seconds (Total)
Chain 2: 
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 33.485 seconds (Warm-up)
Chain 3:                29.5418 seconds (Sampling)
Chain 3:                63.0268 seconds (Total)
Chain 3: 
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 32.3165 seconds (Warm-up)
Chain 1:                44.1083 seconds (Sampling)
Chain 1:                76.4248 seconds (Total)
Chain 1: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 35.4405 seconds (Warm-up)
Chain 4:                44.1243 seconds (Sampling)
Chain 4:                79.5647 seconds (Total)
Chain 4: 

Priors & chain diagnostics

prior_summary(m1e)
Priors for model 'm1e' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [5.00,5.00,5.00,...])

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details
b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1e, size = 0.1, pars = b1)

mcmc_dens_overlay(m1e, pars = b1)


summary(m1e)

Model Info:
 function:     stan_glmer
 family:       binomial [logit]
 formula:      surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + 
       first_c + male_c + donor + (1 | translocation_id) + (1 | 
       site_id)
 algorithm:    sampling
 sample:       20000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 767
 groups:       translocation_id (24), site_id (12)

Estimates:
                                                  mean   sd   10%   50%   90%
(Intercept)                                      0.1    1.0 -1.2   0.1   1.3 
length_z                                         0.0    0.2 -0.3   0.0   0.3 
snowt_z                                         -0.1    0.5 -0.7  -0.1   0.6 
snowt1_z                                        -0.5    0.7 -1.5  -0.6   0.4 
day_z                                            0.2    0.5 -0.5   0.2   0.9 
bdload_z                                        -0.2    0.2 -0.5  -0.2   0.0 
elev_z                                           1.7    0.9  0.6   1.7   2.9 
first_c0.353                                     0.4    0.4  0.0   0.4   0.9 
male_c0.548                                      0.3    0.2  0.1   0.3   0.6 
donor70567                                      -1.0    1.5 -2.7  -1.1   0.8 
donor72996                                      -2.0    1.1 -3.4  -2.1  -0.7 
b[(Intercept) translocation_id:70134_1]         -0.2    0.5 -0.9  -0.1   0.4 
b[(Intercept) translocation_id:70370_1]         -0.4    0.6 -1.1  -0.3   0.2 
b[(Intercept) translocation_id:70370_2]          0.1    0.5 -0.5   0.1   0.7 
b[(Intercept) translocation_id:70413_1]          0.0    0.5 -0.6   0.0   0.6 
b[(Intercept) translocation_id:70413_2]          0.2    0.5 -0.4   0.1   0.9 
b[(Intercept) translocation_id:70413_3]         -0.2    0.6 -0.9  -0.2   0.4 
b[(Intercept) translocation_id:70414_1]         -0.3    0.7 -1.2  -0.2   0.3 
b[(Intercept) translocation_id:70449_1]         -0.1    0.5 -0.7  -0.1   0.5 
b[(Intercept) translocation_id:70449_2]          0.7    0.7  0.0   0.6   1.7 
b[(Intercept) translocation_id:70505_1]          0.0    0.5 -0.5   0.0   0.6 
b[(Intercept) translocation_id:70505_2]          0.1    0.5 -0.5   0.1   0.7 
b[(Intercept) translocation_id:70505_3]          0.1    0.5 -0.5   0.0   0.7 
b[(Intercept) translocation_id:70505_4]         -0.1    0.5 -0.7  -0.1   0.5 
b[(Intercept) translocation_id:70550_1]          0.3    0.5 -0.3   0.2   0.9 
b[(Intercept) translocation_id:70550_2]         -0.1    0.5 -0.7  -0.1   0.4 
b[(Intercept) translocation_id:70556_1]         -0.2    0.6 -0.9  -0.1   0.4 
b[(Intercept) translocation_id:70556_2]         -0.2    0.6 -1.0  -0.1   0.4 
b[(Intercept) translocation_id:70619_1]         -0.2    0.5 -0.9  -0.1   0.4 
b[(Intercept) translocation_id:70628_1]          0.3    0.6 -0.3   0.2   1.1 
b[(Intercept) translocation_id:70641_1]          0.2    0.5 -0.4   0.1   0.9 
b[(Intercept) translocation_id:70641_2]         -0.1    0.5 -0.7   0.0   0.5 
b[(Intercept) translocation_id:70641_3]         -0.3    0.6 -1.1  -0.2   0.2 
b[(Intercept) translocation_id:74976_1]          0.5    0.7 -0.1   0.4   1.5 
b[(Intercept) translocation_id:74976_2]         -0.1    0.5 -0.7  -0.1   0.5 
b[(Intercept) site_id:70134]                    -0.3    0.6 -1.1  -0.3   0.4 
b[(Intercept) site_id:70370]                    -0.9    1.0 -2.3  -0.7   0.2 
b[(Intercept) site_id:70413]                     0.0    1.1 -1.3   0.0   1.2 
b[(Intercept) site_id:70414]                    -0.7    1.1 -2.1  -0.5   0.3 
b[(Intercept) site_id:70449]                     1.0    0.8  0.0   1.0   2.0 
b[(Intercept) site_id:70505]                     0.2    0.6 -0.5   0.1   0.9 
b[(Intercept) site_id:70550]                     0.3    0.6 -0.4   0.2   1.1 
b[(Intercept) site_id:70556]                    -0.7    0.9 -1.8  -0.5   0.3 
b[(Intercept) site_id:70619]                    -0.4    0.7 -1.3  -0.3   0.4 
b[(Intercept) site_id:70628]                     0.6    0.8 -0.2   0.5   1.7 
b[(Intercept) site_id:70641]                    -0.1    0.7 -1.0  -0.1   0.7 
b[(Intercept) site_id:74976]                     0.7    0.9 -0.2   0.6   1.9 
Sigma[translocation_id:(Intercept),(Intercept)]  0.4    0.4  0.0   0.3   0.9 
Sigma[site_id:(Intercept),(Intercept)]           1.2    1.3  0.1   0.8   2.6 

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.3    0.0  0.3   0.3   0.3  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
                                                mcse Rhat n_eff
(Intercept)                                     0.0  1.0   8760
length_z                                        0.0  1.0  26927
snowt_z                                         0.0  1.0  12055
snowt1_z                                        0.0  1.0   5542
day_z                                           0.0  1.0  10670
bdload_z                                        0.0  1.0  28589
elev_z                                          0.0  1.0  10334
first_c0.353                                    0.0  1.0  12870
male_c0.548                                     0.0  1.0  27066
donor70567                                      0.0  1.0   9522
donor72996                                      0.0  1.0   8564
b[(Intercept) translocation_id:70134_1]         0.0  1.0  10178
b[(Intercept) translocation_id:70370_1]         0.0  1.0  13784
b[(Intercept) translocation_id:70370_2]         0.0  1.0  21315
b[(Intercept) translocation_id:70413_1]         0.0  1.0  18159
b[(Intercept) translocation_id:70413_2]         0.0  1.0  16405
b[(Intercept) translocation_id:70413_3]         0.0  1.0  18489
b[(Intercept) translocation_id:70414_1]         0.0  1.0   8310
b[(Intercept) translocation_id:70449_1]         0.0  1.0  15651
b[(Intercept) translocation_id:70449_2]         0.0  1.0   4203
b[(Intercept) translocation_id:70505_1]         0.0  1.0  18690
b[(Intercept) translocation_id:70505_2]         0.0  1.0  23587
b[(Intercept) translocation_id:70505_3]         0.0  1.0  23491
b[(Intercept) translocation_id:70505_4]         0.0  1.0  22403
b[(Intercept) translocation_id:70550_1]         0.0  1.0  12418
b[(Intercept) translocation_id:70550_2]         0.0  1.0  19758
b[(Intercept) translocation_id:70556_1]         0.0  1.0  14624
b[(Intercept) translocation_id:70556_2]         0.0  1.0   7455
b[(Intercept) translocation_id:70619_1]         0.0  1.0  11194
b[(Intercept) translocation_id:70628_1]         0.0  1.0   8502
b[(Intercept) translocation_id:70641_1]         0.0  1.0  15452
b[(Intercept) translocation_id:70641_2]         0.0  1.0  20501
b[(Intercept) translocation_id:70641_3]         0.0  1.0   9364
b[(Intercept) translocation_id:74976_1]         0.0  1.0   5537
b[(Intercept) translocation_id:74976_2]         0.0  1.0  20547
b[(Intercept) site_id:70134]                    0.0  1.0  12998
b[(Intercept) site_id:70370]                    0.0  1.0   5422
b[(Intercept) site_id:70413]                    0.0  1.0  14150
b[(Intercept) site_id:70414]                    0.0  1.0   9926
b[(Intercept) site_id:70449]                    0.0  1.0   4422
b[(Intercept) site_id:70505]                    0.0  1.0  10980
b[(Intercept) site_id:70550]                    0.0  1.0   9459
b[(Intercept) site_id:70556]                    0.0  1.0  10122
b[(Intercept) site_id:70619]                    0.0  1.0   9424
b[(Intercept) site_id:70628]                    0.0  1.0   6708
b[(Intercept) site_id:70641]                    0.0  1.0   9372
b[(Intercept) site_id:74976]                    0.0  1.0   7086
Sigma[translocation_id:(Intercept),(Intercept)] 0.0  1.0   4082
Sigma[site_id:(Intercept),(Intercept)]          0.0  1.0   4867
mean_PPD                                        0.0  1.0  19556
log-posterior                                   0.1  1.0   5917

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Posterior predictive check

pp_check(m1e, plotfun = "stat") +
    xlab("Frog survival rate")

Calculate loo

loo_m1e <- loo(m1e, cores = 4, save_psis = TRUE)
print(loo_m1e)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.

Posterior analysis

tidy(m1e, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1e, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & translocation_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")


mean(bayes_R2(m1e))
[1] 0.3232688
# Frog survival by translocation_id
mcmc_areas_ridges(m1e, regex_pars = ("translocation_id")) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")


# Frog survival by site_id
mcmc_areas_ridges(m1e, regex_pars = ("site_id")) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")

Compare 4 models with different grouping structures

loo_compare(loo_m1b, loo_m1c, loo_m1d, loo_m1e)
    elpd_diff se_diff
m1c   0.0       0.0  
m1e  -0.5       1.9  
m1d  -1.1       3.1  
m1b -18.2       6.3  

Additional analysis of model m1d

Posterior classification accuracy

# Posterior predictive models for each frog in dataset
set.seed(84735)
survival_predict_1 <- posterior_predict(m1d, newdata = d1)

# Classify frog survival based on mean predicted survival probability
survival_predict_2 <- d1 %>% 
  mutate(survival_prob = colMeans(survival_predict_1),
         survival_class = as.numeric(survival_prob >= 0.5)) %>% 
  select(site_id, translocation_id, year, snowt1_z, elev_z, male_c, donor, surv, survival_prob, survival_class)

# Generate confusion matrix and calculate accuracy
survival_predict_2 %>% 
  tabyl(surv, survival_class) %>% 
  adorn_totals(c("row", "col"))
  surv   0   1 Total
     0 471  62   533
     1  96 138   234
 Total 567 200   767
(471 + 138)/767 # overall_accuracy 
[1] 0.7940026
471/533 # true negative rate
[1] 0.8836773
138/234 # true positive rate
[1] 0.5897436
# Generate confusion matrix using function
set.seed(84735)
classification_summary(m1d, data = d1, cutoff = 0.5)
$confusion_matrix
 y   0   1
 0 471  62
 1  96 138

$accuracy_rates
NA
  • Sensitivity or “true positive rate” measures the proportion of Y = 1 observations that are accurately classified.
  • Specificity or “true negative rate” measures the proportion of Y = 0 observations that are accurately classified (i.e., proportion of frogs correctly identified as not surviving).

Create reduced model containing only important predictors for subsequent analyses

  • Reduced model is helpful because it reduces the number of values used to calculate predictive models.
m1d_reduce <- stan_glmer(
  surv ~ snowt1_z + elev_z + male_c + donor + (1 | translocation_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
starting worker pid=12738 on localhost:11976 at 13:40:15.905
starting worker pid=12767 on localhost:11976 at 13:40:16.051
starting worker pid=12796 on localhost:11976 at 13:40:16.183
starting worker pid=12825 on localhost:11976 at 13:40:16.313

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000102 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.02 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000101 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000108 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.08 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000139 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.39 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 12.8679 seconds (Warm-up)
Chain 2:                11.9961 seconds (Sampling)
Chain 2:                24.864 seconds (Total)
Chain 2: 
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 12.7424 seconds (Warm-up)
Chain 1:                12.4851 seconds (Sampling)
Chain 1:                25.2275 seconds (Total)
Chain 1: 
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 12.7796 seconds (Warm-up)
Chain 3:                12.5353 seconds (Sampling)
Chain 3:                25.3149 seconds (Total)
Chain 3: 
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 13.0358 seconds (Warm-up)
Chain 4:                12.7509 seconds (Sampling)
Chain 4:                25.7867 seconds (Total)
Chain 4: 

Confirm fit of reduced model is comparable to full model

loo_m1d_reduce <- loo(m1d, cores = 4, save_psis = TRUE)
print(loo_m1d)

Computed from 20000 by 767 log-likelihood matrix
------
Monte Carlo SE of elpd_loo is 0.0.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
loo_compare(loo_m1d, loo_m1d_reduce)
    elpd_diff se_diff
m1d 0.0       0.0    
m1d 0.0       0.0    

Simulate and plot posterior predictive models & observations

m1d_group_means <- d1 %>% 
  group_by(translocation_id) %>% 
  summarize(count = n(), psurv = mean(surv), elev_z = mean(elev_z), snowt1_z = mean(snowt1_z)) %>% 
  mutate(
    male_c = as.factor(0.548), 
    donor = case_when(
      str_detect(translocation_id, "74976") | str_detect(translocation_id, "70556") ~ "70459",
      str_detect(translocation_id, "70413") ~ "70567",
      TRUE ~ "72996")) %>% 
  arrange(psurv)  # to arrange plot values/labels by psurv

predictions_m1d <- posterior_predict(m1d_reduce, newdata = m1d_group_means)

ppc_intervals(
  y = m1d_group_means$psurv,
  yrep = predictions_m1d, 
  prob = 0.5, 
  prob_outer = 0.8) +
ggplot2::scale_x_continuous(
  labels = m1d_group_means$translocation_id,
  breaks = 1:nrow(m1d_group_means)) +
  xaxis_text(angle = 90, vjust = 0.5) +
xlab("Translocation_id")

  • Uncertainty intervals appear incorrect for binomial model, but not sure how to adjust.
  • Based on mean values, model does an adequate job of predicting survival based on group-level and individual-level characteristics.
  • What explains the low predicted survival at 70370 (both translocations), and high versus low predicted survival at 70550 for 1st and 2nd translocations, respectively? Both sites seem to contain high quality habitat (i.e., high elevation).
  • The predicted survival is based on assumption that only males are included. Run another set of models for females?

Plot posterior plausible relationships between each predictor and the probability of frog survival (conditional effects)

  • As described in https://rdrr.io/cran/brms/man/conditional_effects.brmsfit.html, “When creating conditional_effects for a particular predictor (or interaction of two predictors), one has to choose the values of all other predictors to condition on. By default, the mean is used for continuous variables and the reference category is used for factors, but you may change these values via argument conditions. This also has an implication for the points argument: In the created plots, only those points will be shown that correspond to the factor levels actually used in the conditioning, in order not to create the false impression of bad model fit, where it is just due to conditioning on certain factor levels.”
  • Using rstanarm, I was unable to do this as I expected to: Start with data = d1 and using “newdata =” to specify that predictions should be based on data in d1_elevz (for example), in which non-plotted continuous predictors are set to the mean value. Not sure if my alternative approach is correct.
  • Approach used: Instead of starting the routine with d1, I started with the new dataset in which all variables except the one of interest were set to mean values (continuous variables) and the reference level (categorical variables). This produced plots/results that were very similar to those produced in brms using the conditional_effects function.

Effect of elevation

d1_elevz <- d1 %>% 
  mutate(snowt1_z = mean(snowt1_z), 
         male_c = recode(male_c, "-0.452" = "0.548"),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_elevz %>% distinct(snowt1_z, male_c, donor) # ensure variables were set to correct values
  
d1_elevz %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = elev_z, y = surv)) +
    geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
  labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival")



# d1 %>%  # Test of add_predicted_draws used for similar purpose - unsuccessful
#   select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
#   add_predicted_draws(m1d_reduce, ndraws = 100, re_formula = NA, newdata = d1_elevz) %>% View()
#   ggplot(aes(x = elev_z, y = surv)) +
#     geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#     geom_point(aes(y = .epred)) +
#   labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival")

Effect of winter severity in year t1

d1_snowt1z <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         male_c = recode(male_c, "-0.452" = "0.548"),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_snowt1z %>% distinct(elev_z, male_c, donor) # ensure variables were set to correct values
  
d1_snowt1z %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = snowt1_z, y = surv)) +
    geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
  labs(x = "Winter severity in year following translocation (standardized)", y = "Probability of 1-year frog survival") 

Effect of sex

d1_malec <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         snowt1_z = mean(snowt1_z),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_malec %>% distinct(elev_z, snowt1_z, donor) # ensure variables were set to correct values
  
d1_malec %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = male_c, y = surv)) +
    geom_boxplot(aes(y = .epred, x = male_c)) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Sex (centered)", y = "Probability of 1-year frog survival")

Effect of donor

d1_donor <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         snowt1_z = mean(snowt1_z),
         male_c = recode(male_c, "-0.452" = "0.548"))

d1_donor %>% distinct(elev_z, snowt1_z, male_c) # ensure variables were set to correct values
  
d1_donor %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = donor, y = surv)) +
    geom_boxplot(aes(y = .epred, x = donor)) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Donor population", y = "Probability of 1-year frog survival")

Joint effects of elevation and sex

d1_elevz_malec <- d1 %>% 
  mutate(snowt1_z = mean(snowt1_z), 
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_elevz_malec %>% distinct(snowt1_z, donor) # ensure variables were set to correct values
  
pal <- c("#0d0887", "#f89540")
d1_elevz_malec %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = elev_z, y = surv, color = male_c)) +
    geom_line(aes(y = .epred, group = paste(male_c, .draw)), size = 0.4, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival") +
    scale_color_manual(values = pal)  + 
    theme_classic()

For comparison, fit model using brms and conditional_effects function to plot variable-specific effects

Fit model

m1d_reduce_brms <- brm(
  surv ~ snowt1_z + elev_z + male_c + donor + (1 | translocation_id),
  data = d1,
  family = bernoulli(),
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
Compiling Stan program...
Start sampling
starting worker pid=13412 on localhost:11976 at 13:41:34.174
starting worker pid=13441 on localhost:11976 at 13:41:34.302
starting worker pid=13470 on localhost:11976 at 13:41:34.439
starting worker pid=13499 on localhost:11976 at 13:41:34.568

SAMPLING FOR MODEL 'dbb8efe82731517ccb01114eea2991f6' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 4.4e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'dbb8efe82731517ccb01114eea2991f6' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 4.7e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.47 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)

SAMPLING FOR MODEL 'dbb8efe82731517ccb01114eea2991f6' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 7.9e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.79 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)

SAMPLING FOR MODEL 'dbb8efe82731517ccb01114eea2991f6' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 4.9e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 3.11244 seconds (Warm-up)
Chain 1:                3.90152 seconds (Sampling)
Chain 1:                7.01396 seconds (Total)
Chain 1: 
Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 3.07934 seconds (Warm-up)
Chain 2:                4.26893 seconds (Sampling)
Chain 2:                7.34827 seconds (Total)
Chain 2: 
Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 3.08411 seconds (Warm-up)
Chain 3:                3.02089 seconds (Sampling)
Chain 3:                6.105 seconds (Total)
Chain 3: 
Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 2.96916 seconds (Warm-up)
Chain 4:                4.03496 seconds (Sampling)
Chain 4:                7.00411 seconds (Total)
Chain 4: 

Plot conditional effects

plot(conditional_effects(m1d_reduce_brms, spaghetti = TRUE, mean = TRUE, ndraws = 100), ask = FALSE)

TO DO

---
title: "Predictors of frog survival following translocations - analyses"
author: "Roland Knapp"
output: html_notebook
---

The goal of the analyses in this notebook is to identify predictors of frog survival following translocation. The analyses are conducted in a Bayesian framework using the R package rstanarm.

## Load packages

```{r load-packages}
library(tidyverse)
library(brms)
library(rstanarm)
library(bayesplot)
library(tidybayes)
library(loo)
library(broom.mixed)
library(janitor)

source("classification_summary.R")
```

## Read in frog translocation dataset  
* Read in data, create variables for analysis, standardize continuous variables. 
* arm package used to rescale continuous variables (substract mean, divide by 2 SD), manual centering of binary variables.
```{r read-translocation-data}
d1 <- read_csv(here::here("data", "clean", "frog_translocation_final.csv")) %>% 
  drop_na(bd_load) %>%  # drop xx records from 70xxx where bd_load is NA
  mutate(
    first = if_else(order == 1, 1, 0),
    surv = if_else(survival < 0.5, 0, 1),
    male = if_else(sex == "m", 1, 0),
    elev_lo = if_else(elevation < 3020, 1, 0), # using interval defined by cut_interval, n = 2
    elev_z = arm::rescale(elevation),
    snowt_z = arm::rescale(snow_t),
    snowt1_z = arm::rescale(snow_t1),
    day_z = arm::rescale(day),
    length_z = arm::rescale(length),
    bdload_l = log10(bd_load + 1),
    bdload_z = arm::rescale(bdload_l),
    shore_c = shore - mean(shore),
    male_c = male - mean(male),
    elevlo_c = elev_lo - mean(elev_lo),
    first_c = first - mean(first),
    across(c(shore_c, male_c, elevlo_c, first_c), round, 3), 
    across(c(site_id, shore_c, first_c, donor, male_c, elevlo_c, year), as.factor),
    surv = as.integer(surv))

# Add a descriptive translocation_id
d1 <- d1 %>% 
  distinct(site_id, year) %>% 
  arrange(site_id, year) %>% 
  group_by(site_id) %>% 
  mutate(transno = seq_len(n())) %>% 
  ungroup(site_id) %>% 
  unite(translocation_id, c("site_id", "transno"), remove = FALSE) %>% 
  select(-transno) %>% 
  mutate(translocation_id = as.factor(translocation_id)) %>% 
  inner_join(d1, by = c("site_id", "year")) %>% 
  relocate(translocation_id, .after = site_id)
```

## Null values - check

```{r check-null}
d1 %>% summarize(across(everything(), ~ sum(is.na(.))))
```

## Model using non-standardized predictors
### Model specification
```{r}
m1a <- stan_glm(
  surv ~ length + snow_t + snow_t1 + day + bdload_l + elevation + first + male + donor,
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2,
  cores = 4,
  seed = 84735)
```

### Priors & chain diagnostics
```{r}
prior_summary(m1a)

mcmc_trace(m1a, size = 0.1)
mcmc_dens_overlay(m1a)

summary(m1a)
```
- mcmc diagnostics all look good.

### List model variable names
```{r}
get_variables(m1a)
```

### Posterior predictive check
```{r}
pp_check(m1a, plotfun = "stat") +
    xlab("Frog survival rate")
```
- Posterior predictive distribution implied by regression model closely matches original data (based on mean frog survival).

### Calculate leave-one-out cross-validation (loo)
```{r}
loo_m1a <- loo(m1a, cores = 4, save_psis = TRUE)
print(loo_m1a)
```

### Posterior analysis
```{r}
tidy(m1a, conf.int = TRUE, conf.level = 0.90)

mcmc_intervals(m1a, point_est = "median", prob = 0.80, prob_outer = 0.9) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  # add 0-line to accentuate default line
  ggtitle("Model with non-standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")

mcmc_areas(m1a, area_method = "equal height", prob = 0.90) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with non-standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")

mean(bayes_R2(m1a))
```
- Using non-standardized predictors makes plot hard to read due to vastly different scales of predictors.  
- Based on 90% uncertainty intervals, important predictors are snow_t1, elevation, first, donor70567, and donor72996.

## Model using standardized predictors
### Model specification
```{r}
m1b <- stan_glm(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor,
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4, 
  seed = 84735)
```

### Priors, chain diagnostics, posterior predictive check
```{r}
prior_summary(m1b)

mcmc_trace(m1b, size = 0.1)
mcmc_dens_overlay(m1a)

summary(m1b)
```
- mcmc diagnostics all look good.

### List model variable names
```{r}
get_variables(m1b)
```

### Posterior predictive check
```{r}
pp_check(m1b, plotfun = "stat") +
    xlab("Frog survival rate")
```
- Posterior predictive distribution implied by regression model closely matches original data (based on mean frog survival).

### Calculate loo
```{r}
loo_m1b <- loo(m1b, cores = 4, save_psis = TRUE)
print(loo_m1b)
```

### Posterior analysis
```{r}
tidy(m1b, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1b, area_method = "equal height", prob = 0.90) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors only",
          "Posterior distributions with medians & 90% uncertainty intervals")

mean(bayes_R2(m1b))
```
- As expected, using standardized predictors allows easy comparison across predictors. Important predictors are unchanged. 
- Based on 90% uncertainty intervals, important predictors are snow_t1, elevation, first, donor70567, and donor72996.

## Compare models
```{r}
loo_compare(loo_m1a, loo_m1b)
```
- As expected, models with non-standardized and standardized predictor variables have identical predictive accuracy.  

## Possible grouping structures to add to model
The dataset has three groups: site_id, translocation_id, and donor site. Donor site is not suitable as a grouping variable because there are only 3 levels (i.e., site ids), not enough to make this useful. Site_id and translocation_id contain 12 and 24 levels, respectively, making them suitable as grouping variables. Including both site_id and translocation_id as nested grouping variables (i.e., translocations nested within site ids) also seems justified. Having more levels within a group would seem to increase the power to detect effects of predictors. With fewer levels/less variability, effect may be accounted for primarily by grouping variable itself instead of predictor. With only 1-4 translocations per site and only 12 site ids total, it is possible that model with site_id as sole grouping variable and model with site_id and translocation_id as nested grouping variables will attribute more of the variation in frog survival to the grouping variables and less to the predictors. A priori, that would suggest that the model with translocation_id as the grouping variable might be the most useful model structure (assuming that all models have similar posterior predictive accuracy). Lacking any strong rationale for  one model structure over another, built 3 models and compared their posterior predictive accuracy using loo. 

## Model using standardized predictors and site_id grouping variable
### Model specification
```{r}
m1c <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | site_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
```

### Priors & chain diagnostics
```{r}
prior_summary(m1c)

b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1c, size = 0.1, pars = b1)
mcmc_dens_overlay(m1c, pars = b1)

summary(m1c)
```

### Posterior predictive check
```{r}
pp_check(m1c, plotfun = "stat") +
    xlab("Frog survival rate")
```

### Calculate loo
```{r}
loo_m1c <- loo(m1c, cores = 4, save_psis = TRUE)
print(loo_m1c)
```

### Posterior analysis
```{r}
tidy(m1c, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1c, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & site_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")

mean(bayes_R2(m1c))

# Frog survival by site_id
mcmc_areas_ridges(m1c, regex_pars = "site_id") +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each recipient site_id")
```
- Based on 90% uncertainty intervals, important predictors reduced to elevation and male. 

## Model using standardized predictors and translocation_id grouping variable)
### Model specification
```{r}
m1d <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | translocation_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
```

### Priors & chain diagnostics
```{r}
prior_summary(m1d)

b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1d, size = 0.1, pars = b1)
mcmc_dens_overlay(m1d, pars = b1)

summary(m1d)
```

### Posterior predictive check
```{r}
pp_check(m1d, plotfun = "stat") +
    xlab("Frog survival rate")
```

### Calculate loo
```{r}
loo_m1d <- loo(m1d, cores = 4, save_psis = TRUE)
print(loo_m1d)
```

### Posterior analysis
```{r}
tidy(m1d, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1d, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & translocation_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")

mean(bayes_R2(m1d))

# Frog survival by translocation_id
mcmc_areas_ridges(m1d, regex_pars = "translocation_id") +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")
```
- Based on loo, model with translocation_id as grouping variable provides slightly better fit than model with site_id as grouping variable. 
- Based on 90% uncertainty intervals, important predictors are similar to those in model without grouping variable: snowt1_z, elev_z, male_c0.548, donor72996. 

## Model using standardized predictors and 2 grouping variables: translocation_id nested within site_id)
### Model specification
```{r}
m1e <- stan_glmer(
  surv ~ length_z + snowt_z + snowt1_z + day_z + bdload_z + elev_z + first_c + male_c + donor + (1 | translocation_id) + (1 | site_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
```

### Priors & chain diagnostics
```{r}
prior_summary(m1e)

b1 <- c("(Intercept)", "length_z", "snowt_z", "snowt1_z", "day_z", "bdload_z", "elev_z", "first_c0.353", "male_c0.548", "donor70567", "donor72996")

mcmc_trace(m1e, size = 0.1, pars = b1)
mcmc_dens_overlay(m1e, pars = b1)

summary(m1e)
```

### Posterior predictive check
```{r}
pp_check(m1e, plotfun = "stat") +
    xlab("Frog survival rate")
```

### Calculate loo
```{r}
loo_m1e <- loo(m1e, cores = 4, save_psis = TRUE)
print(loo_m1e)
```

### Posterior analysis
```{r}
tidy(m1e, conf.int = TRUE, conf.level = 0.90)

mcmc_areas(m1e, area_method = "equal height", prob = 0.90, pars = b1) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Model with standardized predictors & translocation_id as grouping variable",
          "Posterior distributions with medians & 90% uncertainty intervals")

mean(bayes_R2(m1e))

# Frog survival by translocation_id
mcmc_areas_ridges(m1e, regex_pars = ("translocation_id")) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")

# Frog survival by site_id
mcmc_areas_ridges(m1e, regex_pars = ("site_id")) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", lwd = 0.5) +  
  ggtitle("Frog survival for each translocation_id")
```

## Compare 4 models with different grouping structures
```{r}
loo_compare(loo_m1b, loo_m1c, loo_m1d, loo_m1e)
```
- Based on expected log-predictive densities (ELPD), 3 models with grouping variables all have substantially higher posterior predictive accuracy than model without any grouping variables (elpd_diff of m1b is more than 2 SE lower than elpd_diff of m1c: elpd_diff = -18.2 $\pm$ 12.6, 2 SE interval does not overlap 0: -5.6 -- -30.8). 
- 3 models with 1 or more grouping variables are equivalent and have similar posterior predictive accuracy (elpd_diff of m1e and m1d are within 2 SE of elpd_diff of m1c).  
- m1d seems to provide more insight into the effect of the predictors, so chose this model for additional analysis. 

## Additional analysis of model m1d
### Posterior classification accuracy
```{r}
# Posterior predictive models for each frog in dataset
set.seed(84735)
survival_predict_1 <- posterior_predict(m1d, newdata = d1)

# Classify frog survival based on mean predicted survival probability
survival_predict_2 <- d1 %>% 
  mutate(survival_prob = colMeans(survival_predict_1),
         survival_class = as.numeric(survival_prob >= 0.5)) %>% 
  select(site_id, translocation_id, year, snowt1_z, elev_z, male_c, donor, surv, survival_prob, survival_class)

# Generate confusion matrix and calculate accuracy
survival_predict_2 %>% 
  tabyl(surv, survival_class) %>% 
  adorn_totals(c("row", "col"))

(471 + 138)/767 # overall_accuracy 
471/533 # true negative rate
138/234 # true positive rate

# Generate confusion matrix using function
set.seed(84735)
classification_summary(m1d, data = d1, cutoff = 0.5)
```
- Sensitivity or "true positive rate" measures the proportion of Y = 1 observations that are accurately classified.  
- Specificity or "true negative rate" measures the proportion of Y = 0 observations that are accurately classified (i.e., proportion of frogs correctly identified as not surviving).  

### Create reduced model containing only important predictors for subsequent analyses
- Reduced model is helpful because it reduces the number of values used to calculate predictive models. 
```{r}
m1d_reduce <- stan_glmer(
  surv ~ snowt1_z + elev_z + male_c + donor + (1 | translocation_id),
  data = d1,
  family = binomial,
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
```

### Confirm fit of reduced model is comparable to full model
```{r}
loo_m1d_reduce <- loo(m1d, cores = 4, save_psis = TRUE)
print(loo_m1d)

loo_compare(loo_m1d, loo_m1d_reduce)
```

### Simulate and plot posterior predictive models & observations
```{r}
m1d_group_means <- d1 %>% 
  group_by(translocation_id) %>% 
  summarize(count = n(), psurv = mean(surv), elev_z = mean(elev_z), snowt1_z = mean(snowt1_z)) %>% 
  mutate(
    male_c = as.factor(0.548), 
    donor = case_when(
      str_detect(translocation_id, "74976") | str_detect(translocation_id, "70556") ~ "70459",
      str_detect(translocation_id, "70413") ~ "70567",
      TRUE ~ "72996")) %>% 
  arrange(psurv)  # to arrange plot values/labels by psurv

predictions_m1d <- posterior_predict(m1d_reduce, newdata = m1d_group_means)

ppc_intervals(
  y = m1d_group_means$psurv,
  yrep = predictions_m1d, 
  prob = 0.5, 
  prob_outer = 0.8) +
ggplot2::scale_x_continuous(
  labels = m1d_group_means$translocation_id,
  breaks = 1:nrow(m1d_group_means)) +
  xaxis_text(angle = 90, vjust = 0.5) +
xlab("Translocation_id")
```
- Uncertainty intervals appear incorrect for binomial model, but not sure how to adjust.  
- Based on mean values, model does an adequate job of predicting survival based on group-level and individual-level characteristics.  
- What explains the low predicted survival at 70370 (both translocations), and high versus low predicted survival at 70550 for 1st and 2nd translocations, respectively? Both sites seem to contain high quality habitat (i.e., high elevation).  
- The predicted survival is based on assumption that only males are included. Run another set of models for females?  

### Plot posterior plausible relationships between each predictor and the probability of frog survival (conditional effects)
- As described in https://rdrr.io/cran/brms/man/conditional_effects.brmsfit.html, "When creating conditional_effects for a particular predictor (or interaction of two predictors), one has to choose the values of all other predictors to condition on. By default, the mean is used for continuous variables and the reference category is used for factors, but you may change these values via argument conditions. This also has an implication for the points argument: In the created plots, only those points will be shown that correspond to the factor levels actually used in the conditioning, in order not to create the false impression of bad model fit, where it is just due to conditioning on certain factor levels."  
- Using rstanarm, I was unable to do this as I expected to: Start with data = d1 and using "newdata = " to specify that predictions should be based on data in d1_elevz (for example), in which non-plotted continuous predictors are set to the mean value. Not sure if my alternative approach is correct.  
- Approach used: Instead of starting the routine with d1, I started with the new dataset in which all variables except the one of interest were set to mean values (continuous variables) and the reference level (categorical variables). This produced plots/results that were very similar to those produced in brms using the conditional_effects function. 

#### Effect of elevation
```{r}
d1_elevz <- d1 %>% 
  mutate(snowt1_z = mean(snowt1_z), 
         male_c = recode(male_c, "-0.452" = "0.548"),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_elevz %>% distinct(snowt1_z, male_c, donor) # ensure variables were set to correct values
  
d1_elevz %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = elev_z, y = surv)) +
    geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
  labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival")


# d1 %>%  # Test of add_predicted_draws used for similar purpose - unsuccessful
#   select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
#   add_predicted_draws(m1d_reduce, ndraws = 100, re_formula = NA, newdata = d1_elevz) %>% View()
#   ggplot(aes(x = elev_z, y = surv)) +
#     geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#     geom_point(aes(y = .epred)) +
#   labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival")
```

#### Effect of winter severity in year t1
```{r}
d1_snowt1z <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         male_c = recode(male_c, "-0.452" = "0.548"),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_snowt1z %>% distinct(elev_z, male_c, donor) # ensure variables were set to correct values
  
d1_snowt1z %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = snowt1_z, y = surv)) +
    geom_line(aes(y = .epred, group = .draw), color = "blue", size = 0.2, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
  labs(x = "Winter severity in year following translocation (standardized)", y = "Probability of 1-year frog survival") 
```

#### Effect of sex
```{r}
d1_malec <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         snowt1_z = mean(snowt1_z),
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_malec %>% distinct(elev_z, snowt1_z, donor) # ensure variables were set to correct values
  
d1_malec %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = male_c, y = surv)) +
    geom_boxplot(aes(y = .epred, x = male_c)) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Sex (centered)", y = "Probability of 1-year frog survival")
```

#### Effect of donor
```{r}
d1_donor <- d1 %>% 
  mutate(elev_z = mean(elev_z), 
         snowt1_z = mean(snowt1_z),
         male_c = recode(male_c, "-0.452" = "0.548"))

d1_donor %>% distinct(elev_z, snowt1_z, male_c) # ensure variables were set to correct values
  
d1_donor %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = donor, y = surv)) +
    geom_boxplot(aes(y = .epred, x = donor)) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Donor population", y = "Probability of 1-year frog survival")
```

#### Joint effects of elevation and sex
```{r}
d1_elevz_malec <- d1 %>% 
  mutate(snowt1_z = mean(snowt1_z), 
         donor = recode(donor, "70567" = "70459", "72996" = "70459"))

d1_elevz_malec %>% distinct(snowt1_z, donor) # ensure variables were set to correct values
  
pal <- c("#0d0887", "#f89540")
d1_elevz_malec %>% 
  select(site_id, translocation_id, snowt1_z, elev_z, male_c, donor, surv) %>% 
  add_epred_draws(m1d_reduce, ndraws = 100, re_formula = NA) %>% 
  ggplot(aes(x = elev_z, y = surv, color = male_c)) +
    geom_line(aes(y = .epred, group = paste(male_c, .draw)), size = 0.4, alpha = 0.5) +
#    geom_point(aes(y = .epred)) +
    labs(x = "Elevation of recipient site (standardized)", y = "Probability of 1-year frog survival") +
    scale_color_manual(values = pal)  + 
    theme_classic()
```


## For comparison, fit model using brms and conditional_effects function to plot variable-specific effects
### Fit model
```{r}
m1d_reduce_brms <- brm(
  surv ~ snowt1_z + elev_z + male_c + donor + (1 | translocation_id),
  data = d1,
  family = bernoulli(),
  chains = 4, 
  iter = 5000*2, 
  cores = 4,
  seed = 84735)
```

### Plot conditional effects
```{r}
plot(conditional_effects(m1d_reduce_brms, spaghetti = TRUE, mean = TRUE, ndraws = 100), ask = FALSE)
```


## TO DO
- Validate priors, tune as necessary. Add final priors to model specification (in case the defaults change).  
- Check approach to plotting conditional effects.  
- Create analysis in which survival from first translocation to a site is a predictor of survival from subsequent translocations. For this analysis, will need to drop all sites at which only a single translocation was conducted.  
- Try un-standardizing predictor values in conditional-effects plots (and other plots?) to produce more intuitive visualizations.  








